/* 
*** Downstream effects by representation of women in the union (incumbent data)
	// input: inclevel_dataset, unionboards_SDSR_clean
	// output: TableA15 (sheet: retentionfem, retentionmal)
*/ 

cap log close
cap log using "$logs/inclevel_heterog1_log", replace

* data toggles
local singest = 0 //0: no restriction; 1: worker's baseline establishment must be a single establishment firm
local bothFM = 1 //0: no restriction; 1: worker's baseline establishment must employ both men and women at basline
local covered = 1 //0: no restriction; 1: worker's baseline establishment must be in the geographic coverage of the CBA
local signing = 1 //0: no restriction; 1: worker's baseline establishment must be a signing establishment
local geoglevel = "microregion_mode" //"": for no geog-yr FES; "state_mode": for state-yr FEs; "microregion_mode": for microregion-yr FEs
local no2011 = 1 //0: include 2011; 1: exclude 2011
local restr = 0 //0: no restriction; 1: worker must be 18 or older and not be a in probation at baseline

* load incumbent level data
use "$files/inclevel_dataset.dta", clear

* sample restrictions
if `singest' {
	keep if singestwork==1
}
if `bothFM' {
	keep if bothFMwork==1
}
if `covered' {
	keep if coverwork==1
}
if `signing' {
	keep if signwork==1
}
if `restr' {
	keep if blage>=18
	gen prob=(blten<=3)
	drop if prob==1
}

* time varying fixed effects
//industry
gen ind2d_num = floor(blind/1e3)
gen ind3d_num = floor(blind/1e2)
egen indyrFE = group(year ind2d_num)
//geography
rename blstate state_mode
rename blmr microregion_mode
egen geogyrFE = group(year `geoglevel')
//tenure
gen ten3m = floor(blten/3)
egen tenyrFE = group(year ten3m)

* for clustering
gen clustergrp = blempl

* event study var 
gen post = (year>=2015) if year!=.
gen treatpost = treatwork*post
gen postgender = post*gender
gen treatgender = treatwork*gender 

tab year, gen(yearj)
	
forvalues i = 1/7 {
	gen interj`i'=treatwork*yearj`i'
}

lab var interj1 "2011"
lab var interj2 "2012"
lab var interj3 "2013"
lab var interj4 "2014"
lab var interj5 "2015"
lab var interj6 "2016"
lab var interj7 "2017"

gen zero=0
lab var zero "2014"
replace interj4 = zero

* Inverse of employment: for weighting regressions
gen blfeminv=1/blfem
gen blmalinv=1/blmal

gen blinv=blfeminv if gender==1
replace blinv=blmalinv if gender==0

* Outcome variables
label var atblemp	"At baseline employer"

* Heterogeneity
//share of women at baseline
gen sharefemale_2014 = blfem/blemp
egen unique_estab = tag(fakeid_estab)
sum sharefemale_2014 if unique_estab==1, d 
local median = r(p50)
gen high_sharefemale_2014 = (sharefemale_2014>`median') if sharefemale_2014!=.
//union board info
gen xxx = union_id if year==2014
egen yyy = mode(xxx), by(fakeid_worker) minmode
replace union_id = yyy
drop xxx yyy
// 1) Share of women in union board at baseline
// 2) female Pres/VP at baseline
preserve
	use "$files/unionboards_SDSR_clean.dta", clear
	keep union_id sh_female_bl female_president_bl female_vicepresident_bl union_type
	//tag unions with info on boards at baseline 
	gen has_bl = (sh_female_bl!=. & female_president_bl!=.)
	duplicates drop
	sum sh_female_bl, d 
	sum sh_female_bl if union_type=="Trabalhador", d 
	gen high_sh_female_bl = (sh_female_bl>1/3) if sh_female_bl!=.
	egen female_PorVP = rowmax( female_president_bl female_vicepresident_bl)
	tab high_sh_female_bl
	tab high_sh_female_bl if union_type=="Trabalhador"
	tab female_PorVP
	tab female_PorVP if union_type=="Trabalhador"
	tempfile baselines
	save `baselines' 
restore	
merge m:1 union_id  using `baselines' , keep(1 3)

* SOME COLUMNS ON TABLE WILL BE RESTRICTED TO HAVING INFO ON BOARD AT BASELINE
tab has_bl
unique union_id
unique union_id if has_bl!=1
unique fakeid_worker
unique fakeid_worker if has_bl!=1 
mdesc high_sh_female_bl female_PorVP high_sharefemale_2014

* Regressions
if `no2011' {
	drop if year==2011
	drop interj1
	local xl = 3.5
}
else {
	local xl = 4.5
}

//female; childbearing
* main results
local i = 0
foreach var of varlist atblemp {	
		local i = `i'+1 
		reghdfe `var' treatpost if gender==1 & childbearing==1 [aweight=blfeminv], absorb(fakeid_worker *FE) cluster(clustergrp)
		su `var' if gender==1 & childbearing==1 & treatwork == 1 & year==2014
		estadd scalar meandv=`r(mean)'
		estadd scalar obs=e(N_full)
		estimates store al`i'
	}			
* interact with low female share (union boards)
gen dummy = (high_sh_female_bl==0) if high_sh_female_bl!=.
local i = 0
foreach var of varlist atblemp {	
		local i = `i'+1 
		reghdfe `var' treatpost##dummy treatwork##dummy post##dummy if gender==1 & childbearing==1 [aweight=blfeminv], absorb(fakeid_worker *FE) cluster(clustergrp)
		estadd scalar obs=e(N_full)
		qui lincom 1.treatpost + 1.treatpost#1.dummy
		estadd scalar sumdum =r(estimate)
		qui test 1.treatpost + 1.treatpost#1.dummy = 0
		estadd scalar sumdump =r(p)
		qui su `var' if gender==1 & childbearing==1 & year == 2014 & treatwork == 1
		estadd scalar meandv=r(mean)
		estimates store bl`i'
}			
drop dummy

*** Export ***
preserve
forvalues j = 1/1 {
		#d ;
		estout al`j' bl`j' using "$tables/T4_1het_incfemcball`j'.txt", 
		style(tab) mlabels(none) label collabels(none)
		cells(b(star fmt(%9.3f)) se(par)) 
		stats(sumdum sumdump meandv obs, fmt(%9.3f %9.3f %9.2fc %9.0fc ) labels("sum" "p-value" "mean depvar" "N" )) 
		drop(o.*, relax) keep(treatpost 1.treatpost 1.treatpost#1.dummy, relax) replace starlevels(* 0.10 ** 0.05 *** 0.01);
		#d cr
	}	

local lab1 atblemp

local r1 = 1

import delimited "$tables/T4_1het_incfemcball1.txt", clear
rename v1 `lab1'
rename v2 main
rename v3 low_femunion
		
export excel using "$tables/TableA15.xlsx", sheet("retentionfem") cell(A`r1') firstrow(var) sheetreplace


restore 

//male; childbearing
* main results
local i = 0
foreach var of varlist atblemp {	
		local i = `i'+1 
		reghdfe `var' treatpost if gender==0 & childbearing==1 [aweight=blmalinv], absorb(fakeid_worker *FE) cluster(clustergrp)
		su `var' if gender==0 & childbearing==1 & treatwork == 1 & year==2014
		estadd scalar meandv=`r(mean)'
		estadd scalar obs=e(N_full)
		estimates store al`i'
	}			
* interact with low female share (union boards)
gen dummy = (high_sh_female_bl==0) if high_sh_female_bl!=.
local i = 0
foreach var of varlist atblemp {	
		local i = `i'+1 
		reghdfe `var' treatpost##dummy treatwork##dummy post##dummy if gender==0 & childbearing==1 [aweight=blmalinv], absorb(fakeid_worker *FE) cluster(clustergrp)
		estadd scalar obs=e(N_full)
		qui lincom 1.treatpost + 1.treatpost#1.dummy
		estadd scalar sumdum =r(estimate)
		qui test 1.treatpost + 1.treatpost#1.dummy = 0
		estadd scalar sumdump =r(p)
		qui su `var' if gender==0 & childbearing==1 & year == 2014 & treatwork == 1
		estadd scalar meandv=r(mean)
		estimates store bl`i'
}			
drop dummy

*** Export ***
preserve
forvalues j = 1/1 {
		#d ;
		estout al`j' bl`j' using "$tables/T4_1het_incmalcball`j'.txt", 
		style(tab) mlabels(none) label collabels(none)
		cells(b(star fmt(%9.3f)) se(par)) 
		stats(sumdum sumdump meandv obs, fmt(%9.3f %9.3f %9.2fc %9.0fc ) labels("sum" "p-value" "mean depvar" "N" )) 
		drop(o.*, relax) keep(treatpost 1.treatpost 1.treatpost#1.dummy, relax) replace starlevels(* 0.10 ** 0.05 *** 0.01);
		#d cr
	}	

local lab1 atblemp

local r1 = 1

import delimited "$tables/T4_1het_incmalcball1.txt", clear
rename v1 `lab1'
rename v2 main
rename v3 low_femunion
		
export excel using "$tables/TableA15.xlsx", sheet("retentionmal") cell(A`r1') firstrow(var) sheetreplace

restore

cap log close
